/*
 * LZMA2Options
 *
 * Author: Lasse Collin <lasse.collin@tukaani.org>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */

package org.tukaani.xz;

import java.io.IOException;
import java.io.InputStream;

import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.lzma.LZMAEncoder;

/**
 * LZMA2 compression options.
 * <p>
 * While this allows setting the LZMA2 compression options in detail, often you
 * only need <code>LZMA2Options()</code> or <code>LZMA2Options(int)</code>.
 */
public class LZMA2Options extends FilterOptions
{
  /**
   * Minimum valid compression preset level is 0.
   */
  public static final int PRESET_MIN = 0;

  /**
   * Maximum valid compression preset level is 9.
   */
  public static final int PRESET_MAX = 9;

  /**
   * Default compression preset level is 6.
   */
  public static final int PRESET_DEFAULT = 6;

  /**
   * Minimum dictionary size is 4 KiB.
   */
  public static final int DICT_SIZE_MIN = 4096;

  /**
   * Maximum dictionary size for compression is 768 MiB.
   * <p>
   * The decompressor supports bigger dictionaries, up to almost 2 GiB. With HC4
   * the encoder would support dictionaries bigger than 768 MiB. The 768 MiB
   * limit comes from the current implementation of BT4 where we would otherwise
   * hit the limits of signed ints in array indexing.
   * <p>
   * If you really need bigger dictionary for decompression, use
   * {@link LZMA2InputStream} directly.
   */
  public static final int DICT_SIZE_MAX = 768 << 20;

  /**
   * The default dictionary size is 8 MiB.
   */
  public static final int DICT_SIZE_DEFAULT = 8 << 20;

  /**
   * Maximum value for lc + lp is 4.
   */
  public static final int LC_LP_MAX = 4;

  /**
   * The default number of literal context bits is 3.
   */
  public static final int LC_DEFAULT = 3;

  /**
   * The default number of literal position bits is 0.
   */
  public static final int LP_DEFAULT = 0;

  /**
   * Maximum value for pb is 4.
   */
  public static final int PB_MAX = 4;

  /**
   * The default number of position bits is 2.
   */
  public static final int PB_DEFAULT = 2;

  /**
   * Compression mode: uncompressed. The data is wrapped into a LZMA2 stream
   * without compression.
   */
  public static final int MODE_UNCOMPRESSED = 0;

  /**
   * Compression mode: fast. This is usually combined with a hash chain match
   * finder.
   */
  public static final int MODE_FAST = LZMAEncoder.MODE_FAST;

  /**
   * Compression mode: normal. This is usually combined with a binary tree match
   * finder.
   */
  public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL;

  /**
   * Minimum value for <code>niceLen</code> is 8.
   */
  public static final int NICE_LEN_MIN = 8;

  /**
   * Maximum value for <code>niceLen</code> is 273.
   */
  public static final int NICE_LEN_MAX = 273;

  /**
   * Match finder: Hash Chain 2-3-4
   */
  public static final int MF_HC4 = LZEncoder.MF_HC4;

  /**
   * Match finder: Binary tree 2-3-4
   */
  public static final int MF_BT4 = LZEncoder.MF_BT4;

  private static final int[] presetToDictSize =
  {1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22, 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26};

  private int dictSize;

  private byte[] presetDict = null;

  private int lc;

  private int lp;

  private int pb;

  private int mode;

  private int niceLen;

  private int mf;

  private int depthLimit;


  /**
   * Creates new LZMA2 options and sets them to the default values. This is
   * equivalent to <code>LZMA2Options(PRESET_DEFAULT)</code>.
   */
  public LZMA2Options()
  {
    try
    {
      setPreset(PRESET_DEFAULT);
    } catch (UnsupportedOptionsException e)
    {
      assert false;
      throw new RuntimeException();
    }
  }


  /**
   * Creates new LZMA2 options and sets them to the given preset.
   * 
   * @throws UnsupportedOptionsException <code>preset</code> is not supported
   */
  public LZMA2Options(int preset) throws UnsupportedOptionsException
  {
    setPreset(preset);
  }


  /**
   * Creates new LZMA2 options and sets them to the given custom values.
   * 
   * @throws UnsupportedOptionsException unsupported options were specified
   */
  public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode, int niceLen, int mf, int depthLimit)
      throws UnsupportedOptionsException
  {
    setDictSize(dictSize);
    setLcLp(lc, lp);
    setPb(pb);
    setMode(mode);
    setNiceLen(niceLen);
    setMatchFinder(mf);
    setDepthLimit(depthLimit);
  }


  /**
   * Sets the compression options to the given preset.
   * <p>
   * The presets 0-3 are fast presets with medium compression. The presets 4-6
   * are fairly slow presets with high compression. The default preset (
   * <code>PRESET_DEFAULT</code>) is 6.
   * <p>
   * The presets 7-9 are like the preset 6 but use bigger dictionaries and have
   * higher compressor and decompressor memory requirements. Unless the
   * uncompressed size of the file exceeds 8&nbsp;MiB, 16&nbsp;MiB, or
   * 32&nbsp;MiB, it is waste of memory to use the presets 7, 8, or 9,
   * respectively.
   * 
   * @throws UnsupportedOptionsException <code>preset</code> is not supported
   */
  public void setPreset(int preset) throws UnsupportedOptionsException
  {
    if (preset < 3 || preset > 9)
      throw new UnsupportedOptionsException("Unsupported preset: " + preset);

    lc = LC_DEFAULT;
    lp = LP_DEFAULT;
    pb = PB_DEFAULT;
    dictSize = presetToDictSize[preset];

    mode = MODE_NORMAL;
    mf = MF_BT4;
    niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64;
    depthLimit = 0;
  }


  /**
   * Sets the dictionary size in bytes.
   * <p>
   * The dictionary (or history buffer) holds the most recently seen
   * uncompressed data. Bigger dictionary usually means better compression.
   * However, using a dictioanary bigger than the size of the uncompressed data
   * is waste of memory.
   * <p>
   * Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid, but sizes
   * of 2^n and 2^n&nbsp;+&nbsp;2^(n-1) bytes are somewhat recommended.
   * 
   * @throws UnsupportedOptionsException <code>dictSize</code> is not supported
   */
  public void setDictSize(int dictSize) throws UnsupportedOptionsException
  {
    if (dictSize < DICT_SIZE_MIN)
      throw new UnsupportedOptionsException("LZMA2 dictionary size must be at least 4 KiB: " + dictSize + " B");

    if (dictSize > DICT_SIZE_MAX)
      throw new UnsupportedOptionsException("LZMA2 dictionary size must not exceed " + (DICT_SIZE_MAX >> 20) + " MiB: "
          + dictSize + " B");

    this.dictSize = dictSize;
  }


  /**
   * Gets the dictionary size in bytes.
   */
  public int getDictSize()
  {
    return dictSize;
  }


  /**
   * Sets a preset dictionary. Use null to disable the use of a preset
   * dictionary. By default there is no preset dictionary.
   * <p>
   * <b>The .xz format doesn't support a preset dictionary for now. Do not set a
   * preset dictionary unless you use raw LZMA2.</b>
   * <p>
   * Preset dictionary can be useful when compressing many similar, relatively
   * small chunks of data independently from each other. A preset dictionary
   * should contain typical strings that occur in the files being compressed.
   * The most probable strings should be near the end of the preset dictionary.
   * The preset dictionary used for compression is also needed for
   * decompression.
   */
  public void setPresetDict(byte[] presetDict)
  {
    this.presetDict = presetDict;
  }


  /**
   * Gets the preset dictionary.
   */
  public byte[] getPresetDict()
  {
    return presetDict;
  }


  /**
   * Sets the number of literal context bits and literal position bits.
   * <p>
   * The sum of <code>lc</code> and <code>lp</code> is limited to 4. Trying to
   * exceed it will throw an exception. This function lets you change both at
   * the same time.
   * 
   * @throws UnsupportedOptionsException <code>lc</code> and <code>lp</code> are
   *           invalid
   */
  public void setLcLp(int lc, int lp) throws UnsupportedOptionsException
  {
    if (lc < 0 || lp < 0 || lc > LC_LP_MAX || lp > LC_LP_MAX || lc + lp > LC_LP_MAX)
      throw new UnsupportedOptionsException("lc + lp must not exceed " + LC_LP_MAX + ": " + lc + " + " + lp);

    this.lc = lc;
    this.lp = lp;
  }


  /**
   * Sets the number of literal context bits.
   * <p>
   * All bytes that cannot be encoded as matches are encoded as literals. That
   * is, literals are simply 8-bit bytes that are encoded one at a time.
   * <p>
   * The literal coding makes an assumption that the highest <code>lc</code>
   * bits of the previous uncompressed byte correlate with the next byte. For
   * example, in typical English text, an upper-case letter is often followed by
   * a lower-case letter, and a lower-case letter is usually followed by another
   * lower-case letter. In the US-ASCII character set, the highest three bits
   * are 010 for upper-case letters and 011 for lower-case letters. When
   * <code>lc</code> is at least 3, the literal coding can take advantage of
   * this property in the uncompressed data.
   * <p>
   * The default value (3) is usually good. If you want maximum compression, try
   * <code>setLc(4)</code>. Sometimes it helps a little, and sometimes it makes
   * compression worse. If it makes it worse, test for example
   * <code>setLc(2)</code> too.
   * 
   * @throws UnsupportedOptionsException <code>lc</code> is invalid, or the sum
   *           of <code>lc</code> and <code>lp</code> exceed LC_LP_MAX
   */
  public void setLc(int lc) throws UnsupportedOptionsException
  {
    setLcLp(lc, lp);
  }


  /**
   * Sets the number of literal position bits.
   * <p>
   * This affets what kind of alignment in the uncompressed data is assumed when
   * encoding literals. See {@link #setPb(int) setPb} for more information about
   * alignment.
   * 
   * @throws UnsupportedOptionsException <code>lp</code> is invalid, or the sum
   *           of <code>lc</code> and <code>lp</code> exceed LC_LP_MAX
   */
  public void setLp(int lp) throws UnsupportedOptionsException
  {
    setLcLp(lc, lp);
  }


  /**
   * Gets the number of literal context bits.
   */
  public int getLc()
  {
    return lc;
  }


  /**
   * Gets the number of literal position bits.
   */
  public int getLp()
  {
    return lp;
  }


  /**
   * Sets the number of position bits.
   * <p>
   * This affects what kind of alignment in the uncompressed data is assumed in
   * general. The default (2) means four-byte alignment (2^<code>pb</code> = 2^2
   * = 4), which is often a good choice when there's no better guess.
   * <p>
   * When the alignment is known, setting the number of position bits
   * accordingly may reduce the file size a little. For example with text files
   * having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using
   * <code>setPb(0)</code> can improve compression slightly. For UTF-16 text,
   * <code>setPb(1)</code> is a good choice. If the alignment is an odd number
   * like 3 bytes, <code>setPb(0)</code> might be the best choice.
   * <p>
   * Even though the assumed alignment can be adjusted with <code>setPb</code>
   * and <code>setLp</code>, LZMA2 still slightly favors 16-byte alignment. It
   * might be worth taking into account when designing file formats that are
   * likely to be often compressed with LZMA2.
   * 
   * @throws UnsupportedOptionsException <code>pb</code> is invalid
   */
  public void setPb(int pb) throws UnsupportedOptionsException
  {
    if (pb < 0 || pb > PB_MAX)
      throw new UnsupportedOptionsException("pb must not exceed " + PB_MAX + ": " + pb);

    this.pb = pb;
  }


  /**
   * Gets the number of position bits.
   */
  public int getPb()
  {
    return pb;
  }


  /**
   * Sets the compression mode.
   * <p>
   * This specifies the method to analyze the data produced by a match finder.
   * The default is <code>MODE_FAST</code> for presets 0-3 and
   * <code>MODE_NORMAL</code> for presets 4-9.
   * <p>
   * Usually <code>MODE_FAST</code> is used with Hash Chain match finders and
   * <code>MODE_NORMAL</code> with Binary Tree match finders. This is also what
   * the presets do.
   * <p>
   * The special mode <code>MODE_UNCOMPRESSED</code> doesn't try to compress the
   * data at all (and doesn't use a match finder) and will simply wrap it in
   * uncompressed LZMA2 chunks.
   * 
   * @throws UnsupportedOptionsException <code>mode</code> is not supported
   */
  public void setMode(int mode) throws UnsupportedOptionsException
  {
    if (mode < MODE_UNCOMPRESSED || mode > MODE_NORMAL)
      throw new UnsupportedOptionsException("Unsupported compression mode: " + mode);

    this.mode = mode;
  }


  /**
   * Gets the compression mode.
   */
  public int getMode()
  {
    return mode;
  }


  /**
   * Sets the nice length of matches. Once a match of at least
   * <code>niceLen</code> bytes is found, the algorithm stops looking for better
   * matches. Higher values tend to give better compression at the expense of
   * speed. The default depends on the preset.
   * 
   * @throws UnsupportedOptionsException <code>niceLen</code> is invalid
   */
  public void setNiceLen(int niceLen) throws UnsupportedOptionsException
  {
    if (niceLen < NICE_LEN_MIN)
      throw new UnsupportedOptionsException("Minimum nice length of matches is " + NICE_LEN_MIN + " bytes: " + niceLen);

    if (niceLen > NICE_LEN_MAX)
      throw new UnsupportedOptionsException("Maximum nice length of matches is " + NICE_LEN_MAX + ": " + niceLen);

    this.niceLen = niceLen;
  }


  /**
   * Gets the nice length of matches.
   */
  public int getNiceLen()
  {
    return niceLen;
  }


  /**
   * Sets the match finder type.
   * <p>
   * Match finder has a major effect on compression speed, memory usage, and
   * compression ratio. Usually Hash Chain match finders are faster than Binary
   * Tree match finders. The default depends on the preset: 0-3 use
   * <code>MF_HC4</code> and 4-9 use <code>MF_BT4</code>.
   * 
   * @throws UnsupportedOptionsException <code>mf</code> is not supported
   */
  public void setMatchFinder(int mf) throws UnsupportedOptionsException
  {
    if (mf != MF_HC4 && mf != MF_BT4)
      throw new UnsupportedOptionsException("Unsupported match finder: " + mf);

    this.mf = mf;
  }


  /**
   * Gets the match finder type.
   */
  public int getMatchFinder()
  {
    return mf;
  }


  /**
   * Sets the match finder search depth limit.
   * <p>
   * The default is a special value of <code>0</code> which indicates that the
   * depth limit should be automatically calculated by the selected match finder
   * from the nice length of matches.
   * <p>
   * Reasonable depth limit for Hash Chain match finders is 4-100 and 16-1000
   * for Binary Tree match finders. Using very high values can make the
   * compressor extremely slow with some files. Avoid settings higher than 1000
   * unless you are prepared to interrupt the compression in case it is taking
   * far too long.
   * 
   * @throws UnsupportedOptionsException <code>depthLimit</code> is invalid
   */
  public void setDepthLimit(int depthLimit) throws UnsupportedOptionsException
  {
    if (depthLimit < 0)
      throw new UnsupportedOptionsException("Depth limit cannot be negative: " + depthLimit);

    this.depthLimit = depthLimit;
  }


  /**
   * Gets the match finder search depth limit.
   */
  public int getDepthLimit()
  {
    return depthLimit;
  }


  public int getEncoderMemoryUsage()
  {
    return LZMA2OutputStream.getMemoryUsage(this);
  }


  public FinishableOutputStream getOutputStream(FinishableOutputStream out)
  {
    return new LZMA2OutputStream(out, this);
  }


  /**
   * Gets how much memory the LZMA2 decoder will need to decompress the data
   * that was encoded with these options and stored in a .xz file.
   * <p>
   * The returned value may bigger than the value returned by a direct call to
   * {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size is not
   * 2^n or 2^n&nbsp;+&nbsp;2^(n-1) bytes. This is because the .xz headers store
   * the dictionary size in such a format and other values are rounded up to the
   * next such value. Such rounding is harmess except it might waste some memory
   * if an unsual dictionary size is used.
   * <p>
   * If you use raw LZMA2 streams and unusual dictioanary size, call
   * {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder memory
   * requirements.
   */
  public int getDecoderMemoryUsage()
  {
    // Round the dictionary size up to the next 2^n or 2^n + 2^(n-1).
    int d = dictSize - 1;
    d |= d >>> 2;
    d |= d >>> 3;
    d |= d >>> 4;
    d |= d >>> 8;
    d |= d >>> 16;
    return LZMA2InputStream.getMemoryUsage(d + 1);
  }


  public InputStream getInputStream(InputStream in) throws IOException
  {
    return new LZMA2InputStream(in, dictSize);
  }


  FilterEncoder getFilterEncoder()
  {
    return new LZMA2Encoder(this);
  }


  public Object clone()
  {
    try
    {
      return super.clone();
    } catch (CloneNotSupportedException e)
    {
      assert false;
      throw new RuntimeException();
    }
  }
}
